0 Data

df <- read.csv("csv/20240805基本資料_retrospective.csv",
               na.strings = c("", "NA"),
               fileEncoding = "Big5")

df

0.1 variables

summary(df)
##       編號          性別           慢性病用藥狀況.複選. Comorbidity.多選. 
##  Min.   :   1   Length:6345        Length:6345          Length:6345       
##  1st Qu.:1726   Class :character   Class :character     Class :character  
##  Median :3634   Mode  :character   Mode  :character     Mode  :character  
##  Mean   :3548                                                             
##  3rd Qu.:5279                                                             
##  Max.   :6971                                                             
##  NA's   :1                                                                
##    醫院代碼         術前主述.複選.     Biopsy.date.確診日期.
##  Length:6345        Length:6345        Length:6345          
##  Class :character   Class :character   Class :character     
##  Mode  :character   Mode  :character   Mode  :character     
##                                                             
##                                                             
##                                                             
##                                                             
##  Biopsy.method.複選.  Cell.Type            多發性          切片檢體腫瘤惡性度
##  Length:6345         Length:6345        Length:6345        Length:6345       
##  Class :character    Class :character   Class :character   Class :character  
##  Mode  :character    Mode  :character   Mode  :character   Mode  :character  
##                                                                              
##                                                                              
##                                                                              
##                                                                              
##    病理分期         危險因子..複選.    合併膀胱腫瘤         合併CIS         
##  Length:6345        Length:6345        Length:6345        Length:6345       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  surgical.margin       左右側          腫瘤位置.多選.       腫瘤大小        
##  Length:6345        Length:6345        Length:6345        Length:6345       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  完整檢體腫瘤惡性度 pathological.stage   術前水腎        
##  Length:6345        Length:6345        Length:6345       
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##                                                          
##  Lymphovascular.invasion Tumor.Necrosis     有無針對UTUC化療  
##  Length:6345             Length:6345        Length:6345       
##  Class :character        Class :character   Class :character  
##  Mode  :character        Mode  :character   Mode  :character  
##                                                               
##                                                               
##                                                               
##                                                               
##  針對UTUC化療型態     化療處方          NxUx.date        
##  Length:6345        Length:6345        Length:6345       
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##                                                          
##  NxUx.Access.method..複選. Bladder.cuff.resection other.bladder.cuff.method
##  Length:6345               Length:6345            Length:6345              
##  Class :character          Class :character       Class :character         
##  Mode  :character          Mode  :character       Mode  :character         
##                                                                            
##                                                                            
##                                                                            
##                                                                            
##  Lymphadenectomy.位置.複選. simultaneously.ipsilateral.adrenalectomy
##  Length:6345                Length:6345                             
##  Class :character           Class :character                        
##  Mode  :character           Mode  :character                        
##                                                                     
##                                                                     
##                                                                     
##                                                                     
##  Post.operation.intravesical.C.T.instillation Endoscopic.resection.date
##  Length:6345                                  Length:6345              
##  Class :character                             Class :character         
##  Mode  :character                             Mode  :character         
##                                                                        
##                                                                        
##                                                                        
##                                                                        
##  Endoscopic.Access.method Endoscopic.Energy.device..複選.
##  Length:6345              Length:6345                    
##  Class :character         Class :character               
##  Mode  :character         Mode  :character               
##                                                          
##                                                          
##                                                          
##                                                          
##  endoscopic.ablation.次數 post.ablation.Intra.cavitary.C.T
##  Min.   : 0.000           Length:6345                     
##  1st Qu.: 1.000           Class :character                
##  Median : 2.000           Mode  :character                
##  Mean   : 2.416                                           
##  3rd Qu.: 3.000                                           
##  Max.   :15.000                                           
##  NA's   :5915                                             
##  Salvage.Nephroureterectomy date.of.salvage.NU date.of.segmental.resection
##  Length:6345                Length:6345        Length:6345                
##  Class :character           Class :character   Class :character           
##  Mode  :character           Mode  :character   Mode  :character           
##                                                                           
##                                                                           
##                                                                           
##                                                                           
##  Segmental.resection.Access.method..複選. salvage.Nephroureterectomy
##  Length:6345                              Length:6345               
##  Class :character                         Class :character          
##  Mode  :character                         Mode  :character          
##                                                                     
##                                                                     
##                                                                     
##                                                                     
##  Date.of.salvage.NU Clavien.Dindo.classification.複選.
##  Length:6345        Length:6345                       
##  Class :character   Class :character                  
##  Mode  :character   Mode  :character                  
##                                                       
##                                                       
##                                                       
##                                                       
##  complication.list..請填寫手術併發症中英文均可. 術後住院天數..day.
##  Length:6345                                    Min.   : 0.000    
##  Class :character                               1st Qu.: 6.000    
##  Mode  :character                               Median : 7.000    
##                                                 Mean   : 8.541    
##                                                 3rd Qu.: 9.000    
##                                                 Max.   :99.000    
##                                                 NA's   :2021      
##  Residual.bladder.cuff Date.of.last.cystoscopy
##  Length:6345           Length:6345            
##  Class :character      Class :character       
##  Mode  :character      Mode  :character       
##                                               
##                                               
##                                               
##                                               
##  Bladder.UC.after.NUx.or.Endoscopic.or.Segmental.resection
##  Length:6345                                              
##  Class :character                                         
##  Mode  :character                                         
##                                                           
##                                                           
##                                                           
##                                                           
##  Date.of.Bladder.UC.recurrence.after.NUx.or.Endoscopic.or.Segmental.resection
##  Length:6345                                                                 
##  Class :character                                                            
##  Mode  :character                                                            
##                                                                              
##                                                                              
##                                                                              
##                                                                              
##  患側Upper.ureter.or.renal.pelvis.local.recurrence
##  Length:6345                                      
##  Class :character                                 
##  Mode  :character                                 
##                                                   
##                                                   
##                                                   
##                                                   
##  Date.of.Upper.ureter.or.renal.pelvis.recurrence
##  Length:6345                                    
##  Class :character                               
##  Mode  :character                               
##                                                 
##                                                 
##                                                 
##                                                 
##  患側Lower.ureter.or.bladder.cuff..local.recurrence
##  Length:6345                                       
##  Class :character                                  
##  Mode  :character                                  
##                                                    
##                                                    
##                                                    
##                                                    
##  Date.of.Lower.ureter.or.bladder.cuff.recurrence 淋巴轉移及位置..複選.
##  Length:6345                                     Length:6345          
##  Class :character                                Class :character     
##  Mode  :character                                Mode  :character     
##                                                                       
##                                                                       
##                                                                       
##                                                                       
##  Date.of.LN.mets    遠端轉移.複選.     Date.of.distant.mets
##  Length:6345        Length:6345        Length:6345         
##  Class :character   Class :character   Class :character    
##  Mode  :character   Mode  :character   Mode  :character    
##                                                            
##                                                            
##                                                            
##                                                            
##  Disease.free.註.不含膀胱內復發.  Mortality         Date.of.mortality 
##  Length:6345                     Length:6345        Length:6345       
##  Class :character                Class :character   Class :character  
##  Mode  :character                Mode  :character   Mode  :character  
##                                                                       
##                                                                       
##                                                                       
##                                                                       
##  長期.Complication  手術到死亡間隔時間..月. Post.OP.1.month.eGFR
##  Length:6345        Min.   :     -51        Min.   : 0.3898     
##  Class :character   1st Qu.:      11        1st Qu.:57.5790     
##  Mode  :character   Median :      29        Median :    Inf     
##                     Mean   :    6254        Mean   :    Inf     
##                     3rd Qu.:      66        3rd Qu.:    Inf     
##                     Max.   :16708534        Max.   :    Inf     
##                     NA's   :3653            NA's   :79          
##    last.eGFR     Lost.follow.up     Longest.follow.up.month.for.BDFS.DFS
##  Min.   : 0.00   Length:6345        Min.   : -1.61                      
##  1st Qu.:24.80   Class :character   1st Qu.: 12.42                      
##  Median :54.41   Mode  :character   Median : 31.93                      
##  Mean   :  Inf                      Mean   : 43.21                      
##  3rd Qu.:  Inf                      3rd Qu.: 62.53                      
##  Max.   :  Inf                      Max.   :275.59                      
##  NA's   :43                         NA's   :326                         
##  Longest.follow.up.month.for.OS.CSS 死亡檔最後追蹤日期 死亡檔最長追蹤時間.month
##  Min.   :   0.00                    Length:6345        Min.   : -51.35         
##  1st Qu.:  20.86                    Class :character   1st Qu.:  21.48         
##  Median :  51.41                    Mode  :character   Median :  53.43         
##  Mean   :  65.76                                       Mean   :  66.14         
##  3rd Qu.:  95.27                                       3rd Qu.:  94.84         
##  Max.   :2023.10                                       Max.   :2023.10         
##  NA's   :52                                            NA's   :1001            
##      ECOG             ASA.score     身高..公分.         體重..公斤.    
##  Length:6345        Min.   :1.000   Length:6345        Min.   : 17.20  
##  Class :character   1st Qu.:2.000   Class :character   1st Qu.: 52.00  
##  Mode  :character   Median :3.000   Mode  :character   Median : 60.00  
##                     Mean   :2.581                      Mean   : 61.07  
##                     3rd Qu.:3.000                      3rd Qu.: 68.00  
##                     Max.   :4.000                      Max.   :646.00  
##                     NA's   :2322                       NA's   :1858    
##      生日              診斷年紀      術前Cr.level..mg.dl.
##  Length:6345        Min.   :  8.00   Length:6345         
##  Class :character   1st Qu.: 61.33   Class :character    
##  Mode  :character   Median : 69.18   Mode  :character    
##                     Mean   : 68.31                       
##                     3rd Qu.: 76.03                       
##                     Max.   :101.72                       
##                     NA's   :49

0.2 variable names

colnames(df)
##  [1] "編號"                                                                        
##  [2] "性別"                                                                        
##  [3] "慢性病用藥狀況.複選."                                                        
##  [4] "Comorbidity.多選."                                                           
##  [5] "醫院代碼"                                                                    
##  [6] "術前主述.複選."                                                              
##  [7] "Biopsy.date.確診日期."                                                       
##  [8] "Biopsy.method.複選."                                                         
##  [9] "Cell.Type"                                                                   
## [10] "多發性"                                                                      
## [11] "切片檢體腫瘤惡性度"                                                          
## [12] "病理分期"                                                                    
## [13] "危險因子..複選."                                                             
## [14] "合併膀胱腫瘤"                                                                
## [15] "合併CIS"                                                                     
## [16] "surgical.margin"                                                             
## [17] "左右側"                                                                      
## [18] "腫瘤位置.多選."                                                              
## [19] "腫瘤大小"                                                                    
## [20] "完整檢體腫瘤惡性度"                                                          
## [21] "pathological.stage"                                                          
## [22] "術前水腎"                                                                    
## [23] "Lymphovascular.invasion"                                                     
## [24] "Tumor.Necrosis"                                                              
## [25] "有無針對UTUC化療"                                                            
## [26] "針對UTUC化療型態"                                                            
## [27] "化療處方"                                                                    
## [28] "NxUx.date"                                                                   
## [29] "NxUx.Access.method..複選."                                                   
## [30] "Bladder.cuff.resection"                                                      
## [31] "other.bladder.cuff.method"                                                   
## [32] "Lymphadenectomy.位置.複選."                                                  
## [33] "simultaneously.ipsilateral.adrenalectomy"                                    
## [34] "Post.operation.intravesical.C.T.instillation"                                
## [35] "Endoscopic.resection.date"                                                   
## [36] "Endoscopic.Access.method"                                                    
## [37] "Endoscopic.Energy.device..複選."                                             
## [38] "endoscopic.ablation.次數"                                                    
## [39] "post.ablation.Intra.cavitary.C.T"                                            
## [40] "Salvage.Nephroureterectomy"                                                  
## [41] "date.of.salvage.NU"                                                          
## [42] "date.of.segmental.resection"                                                 
## [43] "Segmental.resection.Access.method..複選."                                    
## [44] "salvage.Nephroureterectomy"                                                  
## [45] "Date.of.salvage.NU"                                                          
## [46] "Clavien.Dindo.classification.複選."                                          
## [47] "complication.list..請填寫手術併發症中英文均可."                              
## [48] "術後住院天數..day."                                                          
## [49] "Residual.bladder.cuff"                                                       
## [50] "Date.of.last.cystoscopy"                                                     
## [51] "Bladder.UC.after.NUx.or.Endoscopic.or.Segmental.resection"                   
## [52] "Date.of.Bladder.UC.recurrence.after.NUx.or.Endoscopic.or.Segmental.resection"
## [53] "患側Upper.ureter.or.renal.pelvis.local.recurrence"                           
## [54] "Date.of.Upper.ureter.or.renal.pelvis.recurrence"                             
## [55] "患側Lower.ureter.or.bladder.cuff..local.recurrence"                          
## [56] "Date.of.Lower.ureter.or.bladder.cuff.recurrence"                             
## [57] "淋巴轉移及位置..複選."                                                       
## [58] "Date.of.LN.mets"                                                             
## [59] "遠端轉移.複選."                                                              
## [60] "Date.of.distant.mets"                                                        
## [61] "Disease.free.註.不含膀胱內復發."                                             
## [62] "Mortality"                                                                   
## [63] "Date.of.mortality"                                                           
## [64] "長期.Complication"                                                           
## [65] "手術到死亡間隔時間..月."                                                     
## [66] "Post.OP.1.month.eGFR"                                                        
## [67] "last.eGFR"                                                                   
## [68] "Lost.follow.up"                                                              
## [69] "Longest.follow.up.month.for.BDFS.DFS"                                        
## [70] "Longest.follow.up.month.for.OS.CSS"                                          
## [71] "死亡檔最後追蹤日期"                                                          
## [72] "死亡檔最長追蹤時間.month"                                                    
## [73] "ECOG"                                                                        
## [74] "ASA.score"                                                                   
## [75] "身高..公分."                                                                 
## [76] "體重..公斤."                                                                 
## [77] "生日"                                                                        
## [78] "診斷年紀"                                                                    
## [79] "術前Cr.level..mg.dl."

0.3 rename

colnames(df) <- c("編號", "性別", "慢性病用藥狀況", "Comorbidity", "醫院代碼", "術前主述", "Biopsy_date_確診日期", "Biopsy_method", "Cell_Type", "多發性", "切片檢體腫瘤惡性度", "病理分期", "危險因子", "合併膀胱腫瘤", "合併CIS", "surgical_margin", "左右側", "腫瘤位置", "腫瘤大小", "完整檢體腫瘤惡性度", "pathological_stage", "術前水腎", "Lymphovascular_invasion", "Tumor_Necrosis", "有無針對UTUC化療", "針對UTUC化療型態", "化療處方", "NxUx_date", "NxUx_Access_method", "Bladder_cuff_resection", "other_bladder_cuff_method", "Lymphadenectomy_位置", "simultaneously_ipsilateral_adrenalectomy", "Post_operation_intravesical_CT_instillation", "Endoscopic_resection_date", "Endoscopic_Access_method", "Endoscopic_Energy_device", "endoscopic_ablation_次數", "post_ablation_Intra_cavitary_CT", "Salvage_Nephroureterectomy", "date_of_salvage_NU", "date_of_segmental_resection", "Segmental_resection_Access_method", "salvage_Nephroureterectomy", "Date_of_salvage_NU", "Clavien_Dindo_classification", "complication_list", "術後住院天數_天", "Residual_bladder_cuff", "Date_of_last_cystoscopy", "Bladder_UC_after_NUx_or_Endoscopic_or_Segmental_resection", "Date_of_Bladder_UC_recurrence_after_NUx_or_Endoscopic_or_Segmental_resection", "患側Upper_ureter_or_renal_pelvis_local_recurrence", "Date_of_Upper_ureter_or_renal_pelvis_recurrence", "患側Lower_ureter_or_bladder_cuff_local_recurrence", "Date_of_Lower_ureter_or_bladder_cuff_recurrence", "淋巴轉移及位置", "Date_of_LN_mets", "遠端轉移", "Date_of_distant_mets", "Disease_free", "Mortality", "Date_of_mortality", "長期_Complication", "手術到死亡間隔時間_月", "Post_OP_1_month_eGFR", "last_eGFR", "Lost_follow_up", "Longest_follow_up_month_for_BDFS_DFS", "Longest_follow_up_month_for_OS_CSS", "死亡檔最後追蹤日期", "死亡檔最長追蹤時間_月", "ECOG", "ASA", "身高", "體重", "生日", "診斷年紀", "術前Cr_level_mg_dl")

0.4 drop missing rows

n_missing <- c()

for(i in 1:6345){
  if(sum(1*is.na(df[i,]))>50){
    n_missing <- c(n_missing, i)
  }
}


df1 <- df[-n_missing,]

1 Selection

1.1 pckage

library(dplyr)

1.2 select()

df2 <- df1%>%
  dplyr::select(編號, 性別, ECOG, 身高, 體重, 生日, Comorbidity, 腫瘤位置, 腫瘤大小, pathological_stage, Mortality, Date_of_mortality, 術前Cr_level_mg_dl, Post_OP_1_month_eGFR, 死亡檔最長追蹤時間_月)

df2

2 Type

df3 <- df2%>%
  transform(性別 = as.factor(性別))%>%
  transform(ECOG = as.factor(ECOG))%>%
  transform(身高 = as.numeric(身高))%>%
  transform(體重 = as.numeric(體重))%>%
  transform(生日 = as.Date(生日, "%m/%d/%Y"))%>%
  transform(Comorbidity = as.factor(Comorbidity))%>%
  transform(腫瘤位置 = as.factor(腫瘤位置))%>%
  transform(腫瘤大小 = as.factor(腫瘤大小))%>%
  transform(pathological_stage = as.factor(pathological_stage))%>%
  transform(Mortality = as.factor(Mortality))%>%
  transform(Date_of_mortality = as.Date(Date_of_mortality, "%m/%d/%Y"))%>%
  transform(術前Cr_level_mg_dl = as.numeric(術前Cr_level_mg_dl))%>%
  transform(Post_OP_1_month_eGFR = as.numeric(Post_OP_1_month_eGFR))%>%
  transform(死亡檔最長追蹤時間_月 = as.numeric(死亡檔最長追蹤時間_月))
## Warning in eval(substitute(list(...)), `_data`, parent.frame()): NAs introduced
## by coercion
## Warning in eval(substitute(list(...)), `_data`, parent.frame()): NAs introduced
## by coercion
df3

2.1 Factor

df2%>%
  transform(性別 = as.factor(性別))%>%
  transform(ECOG = as.factor(ECOG))%>%
  transform(Comorbidity = as.factor(Comorbidity))%>%
  transform(腫瘤位置 = as.factor(腫瘤位置))%>%
  transform(腫瘤大小 = as.factor(腫瘤大小))%>%
  transform(pathological_stage = as.factor(pathological_stage))%>%
  transform(Mortality = as.factor(Mortality))%>%
  dplyr::select(性別, ECOG, Comorbidity, 腫瘤位置, pathological_stage, Mortality)

2.2 Numeric

df2%>%
  transform(身高 = as.numeric(身高))%>%
  transform(體重 = as.numeric(體重))%>%
  transform(術前Cr_level_mg_dl = as.numeric(術前Cr_level_mg_dl))%>%
  transform(Post_OP_1_month_eGFR = as.numeric(Post_OP_1_month_eGFR))%>%
  transform(死亡檔最長追蹤時間_月 = as.numeric(死亡檔最長追蹤時間_月))%>%
  dplyr::select(身高, 體重, 術前Cr_level_mg_dl, Post_OP_1_month_eGFR, 死亡檔最長追蹤時間_月)
## Warning in eval(substitute(list(...)), `_data`, parent.frame()): NAs introduced
## by coercion
## Warning in eval(substitute(list(...)), `_data`, parent.frame()): NAs introduced
## by coercion

2.3 Date

df2%>%
  transform(生日 = as.Date(生日, "%m/%d/%Y"))%>%
  transform(Date_of_mortality = as.Date(Date_of_mortality, "%m/%d/%Y"))%>%
  select(生日, Date_of_mortality)

2.4 Recap

data frame

df3

summary()

summary(df3)
##       編號        性別                               ECOG           身高      
##  Min.   :   1   1 男:2731   0 無症狀                   :2090   Min.   :128.0  
##  1st Qu.:1701   2 女:3539   1 有症狀,可步行對生活不影響:1799   1st Qu.:152.0  
##  Median :3604               2 躺在床上時間小於50%      : 324   Median :158.0  
##  Mean   :3518               3 躺在床上時間大於50%      :  48   Mean   :158.1  
##  3rd Qu.:5232               4 完全臥床                 :  15   3rd Qu.:164.0  
##  Max.   :6961               NA's                       :1994   Max.   :189.0  
##                                                                NA's   :1825   
##       體重             生日           
##  Min.   : 17.20   Min.   :1907-10-01  
##  1st Qu.: 52.00   1st Qu.:1936-07-01  
##  Median : 60.00   Median :1943-11-19  
##  Mean   : 61.08   Mean   :1944-07-17  
##  3rd Qu.: 68.00   3rd Qu.:1952-04-15  
##  Max.   :646.00   Max.   :1992-12-20  
##  NA's   :1797     NA's   :621         
##                              Comorbidity                 腫瘤位置   
##  0 none                            :1248   1 腎盂            :2564  
##  5 HTN                             : 744   4 下輸尿管        : 808  
##  5 HTN, 10 DM                      : 411   2 上輸尿管        : 653  
##  10 DM                             : 201   3 中輸尿管        : 520  
##  16 malignancy (非UTUC/ bladder UC): 144   1 腎盂, 2 上輸尿管: 511  
##  (Other)                           :3062   (Other)           :1210  
##  NA's                              : 460   NA's              :   4  
##               腫瘤大小       pathological_stage              Mortality   
##  4 ? 3cm          :2238   4 pT3, 9 pNx:1035     0 no              :2781  
##  3 ?2 & &#60; 3 cm: 552   2 pT1, 9 pNx:1027     1 UTUC related    :1244  
##  2 ?1 & &#60; 2 cm: 519   3 pT2, 9 pNx: 743     2 non-UTUC related:1218  
##  3 ?2 & < 3 cm    : 505   9 pNx, 1 pTa: 655     3 Nonknown        : 567  
##  2 ?1 & < 2 cm    : 497   4 pT3, 6 pN0: 333     3 死因不明        : 259  
##  (Other)          : 910   (Other)     :1697     (Other)           : 179  
##  NA's             :1049   NA's        : 780     NA's              :  22  
##  Date_of_mortality    術前Cr_level_mg_dl Post_OP_1_month_eGFR
##  Min.   :1989-02-10   Min.   : 0.190     Min.   : 0.3898     
##  1st Qu.:2012-03-22   1st Qu.: 1.000     1st Qu.:57.1777     
##  Median :2016-04-03   Median : 1.350     Median :    Inf     
##  Mean   :2014-09-08   Mean   : 2.361     Mean   :    Inf     
##  3rd Qu.:2019-01-14   3rd Qu.: 2.100     3rd Qu.:    Inf     
##  Max.   :2024-06-12   Max.   :21.860     Max.   :    Inf     
##  NA's   :3583         NA's   :565        NA's   :77          
##  死亡檔最長追蹤時間_月
##  Min.   : -51.35      
##  1st Qu.:  21.53      
##  Median :  53.50      
##  Mean   :  66.16      
##  3rd Qu.:  94.81      
##  Max.   :2023.10      
##  NA's   :932

3 Detail

df4 <- df3%>%
  mutate(BMI = factor(ifelse(is.na(身高)|is.na(體重),
                             NA,
                             ifelse(體重/(身高/100)^2<25,
                                    "0 Normal", "1 Overweight"))),
         .after = 體重)%>%
  dplyr::select(-身高, -體重)%>%
  mutate(Comorbidity = as.factor(
    ifelse(is.na(Comorbidity), NA,
                              ifelse(grepl("5 HTN", Comorbidity),
                                     "1 HTN",
                                     "0 No HTN"))
  ))%>%
  mutate(腫瘤位置 = as.factor(
    ifelse(is.na(腫瘤位置), "5 無法識別",
           ifelse(腫瘤位置=="0 non-visible", "0 non-visible",
                       ifelse(腫瘤位置=="1 腎盂", "1 腎盂",
                          ifelse(腫瘤位置=="2 上輸尿管"|
                                   腫瘤位置=="3 中輸尿管"|
                                   腫瘤位置=="4 下輸尿管"|
                                   腫瘤位置=="6 Urerter, unknown site", "2 輸尿管",
                                 ifelse(腫瘤位置=="5 膀胱袖口", "3 膀胱袖口",
                                        ifelse(腫瘤位置=="7 Not available", "5 無法識別",
                                               "4 多個位置"))))))
  ))%>%
  mutate(腫瘤大小 = as.factor(
    ifelse(is.na(腫瘤大小)|腫瘤大小=="Not available", "0 Not available",
           ifelse(grepl(3,腫瘤大小)|grepl(4,腫瘤大小), "2 >=2cm", "1 <2cm"))
  ))%>%
  mutate(pT = as.factor(
    ifelse(is.na(pathological_stage), NA,
                     ifelse(grepl("T0", pathological_stage)|
                              grepl("Tis", pathological_stage)|
                              grepl("Ta", pathological_stage), "1 T0/Tis/Ta",
                            ifelse(grepl("T1", pathological_stage), "2 T1",
                                   ifelse(grepl("T2", pathological_stage), "3 T2",
                                          ifelse(grepl("T3", pathological_stage), "4 T3",
                                                 ifelse(grepl("T4", pathological_stage), "5 T4", "0 None"))))))
  ),
         .after = pathological_stage)%>%
  mutate(pN = as.factor(
    ifelse(is.na(pathological_stage), NA,
                     ifelse(grepl("9 ", pathological_stage), "1 Nx",
                            ifelse(grepl("6 ", pathological_stage), "2 N0",
                                   ifelse(grepl("7 ", pathological_stage), "3 N1",
                                          ifelse(grepl("8 ", pathological_stage), "4 N2", "0 None")))))
  ),
         .after = pT)%>%
  mutate(pM = as.factor(
    ifelse(is.na(pathological_stage), NA,
                     ifelse(grepl("12 ", pathological_stage), "1 M0",
                            ifelse(grepl("11 ", pathological_stage), "2 M1", "0 None")))
  ),
         .after = pN)%>%
  dplyr::select(-pathological_stage)%>%
  mutate(Mortality = as.factor(
    ifelse(is.na(Mortality),
           NA,
           ifelse(grepl(0, Mortality),
                  "0 No",
                  ifelse(grepl(1, Mortality),
                         "1 UTUC related",
                         ifelse(grepl(2, Mortality),
                                "2 Non-UTUC related",
                                ifelse(grepl(3, Mortality),
                                       "3 死因不明",
                                       ifelse(grepl(4, Mortality),
                                              "4 Surgery related", NA))))))
  ))%>%
  mutate(Age_of_Mortality = ifelse(is.na(Date_of_mortality)|is.na(生日), NA,
                                   as.numeric(difftime(Date_of_mortality,
                                                       生日))),
         .after = Date_of_mortality)%>%
  mutate(Age_of_Mortality = round(Age_of_Mortality/365, 0))

df4

3.1 Functions

  • mutate()
  • ifelse()
    • is.na()
    • grepl()
  • round()

3.2 Variables


3.2.1 BMI

df3%>%
  mutate(BMI = round(體重/(身高/100)^2, digits = 2),
         .after = 體重)%>%
  mutate(BMI.group = factor(ifelse(is.na(身高)|is.na(體重),
                             NA,
                             ifelse(體重/(身高/100)^2<25,
                                    "Normal", "Overweight"))),
         .after = BMI)%>%
  dplyr::select(身高, 體重, BMI, BMI.group)

3.2.2 Comorbidity

df3%>%
  mutate(Comorbidity.HTN = as.factor(
    ifelse(is.na(Comorbidity), NA,
                              ifelse(grepl("5 HTN", Comorbidity),
                                     "1 HTN",
                                     "0 No HTN"))
  ))%>%
  dplyr::select(Comorbidity, Comorbidity.HTN)

3.2.3 腫瘤位置

df3%>%
  mutate(腫瘤位置.group = as.factor(
    ifelse(is.na(腫瘤位置), "5 無法識別",
           ifelse(腫瘤位置=="0 non-visible", "0 non-visible",
                       ifelse(腫瘤位置=="1 腎盂", "1 腎盂",
                          ifelse(腫瘤位置=="2 上輸尿管"|
                                   腫瘤位置=="3 中輸尿管"|
                                   腫瘤位置=="4 下輸尿管"|
                                   腫瘤位置=="6 Urerter, unknown site", "2 輸尿管",
                                 ifelse(腫瘤位置=="5 膀胱袖口", "3 膀胱袖口",
                                        ifelse(腫瘤位置=="7 Not available", "5 無法識別",
                                               "4 多個位置"))))))
  ))%>%
  dplyr::select(腫瘤位置, 腫瘤位置.group)

3.2.4 腫瘤大小

df3%>%
  mutate(腫瘤大小.2cm = as.factor(
    ifelse(is.na(腫瘤大小)|腫瘤大小=="Not available", "0 Not available",
           ifelse(grepl(3,腫瘤大小)|grepl(4,腫瘤大小), "2 >=2cm", "1 <2cm"))
  ))%>%
  dplyr::select(腫瘤大小, 腫瘤大小.2cm)

3.2.5 Pathological Stage

df3%>%
  mutate(pT = as.factor(
    ifelse(is.na(pathological_stage), NA,
                     ifelse(grepl("T0", pathological_stage)|
                              grepl("Tis", pathological_stage)|
                              grepl("Ta", pathological_stage), "T0/Tis/Ta",
                            ifelse(grepl("T1", pathological_stage), "T1",
                                   ifelse(grepl("T2", pathological_stage), "T2",
                                          ifelse(grepl("T3", pathological_stage), "T3",
                                                 ifelse(grepl("T4", pathological_stage), "T4", "None"))))))
  ),
         .after = pathological_stage)%>%
  mutate(pN = as.factor(
    ifelse(is.na(pathological_stage), NA,
                     ifelse(grepl("9 ", pathological_stage), "Nx",
                            ifelse(grepl("6 ", pathological_stage), "N0",
                                   ifelse(grepl("7 ", pathological_stage), "N1",
                                          ifelse(grepl("8 ", pathological_stage), "N2", "None")))))
  ),
         .after = pT)%>%
  mutate(pM = as.factor(
    ifelse(is.na(pathological_stage), NA,
                     ifelse(grepl("12 ", pathological_stage), "M0",
                            ifelse(grepl("11 ", pathological_stage), "M1", "None")))
  ),
         .after = pN)%>%
  dplyr::select(pathological_stage, pT, pN, pM)

3.2.6 Mortality

df3%>%
  mutate(Mortality.na = as.factor(
    ifelse(is.na(Mortality),
           NA,
           ifelse(grepl(0, Mortality),
                  "0 No",
                  ifelse(grepl(1, Mortality),
                         "1 UTUC related",
                         ifelse(grepl(2, Mortality),
                                "2 Non-UTUC related",
                                ifelse(grepl(3, Mortality),
                                       "3 死因不明",
                                       ifelse(grepl(4, Mortality),
                                              "4 Surgery related", NA))))))
  ))%>%
  dplyr::select(Mortality, Mortality.na)

3.2.7 Age of Mortality

df3%>%
  mutate(Age_of_Mortality = ifelse(is.na(Date_of_mortality)|is.na(生日), NA,
                                   as.numeric(difftime(Date_of_mortality,
                                                       生日))),
         .after = Date_of_mortality)%>%
  mutate(Age_of_Mortality = round(Age_of_Mortality/365, 0))%>%
  dplyr::select(生日, Date_of_mortality, Age_of_Mortality)

4 Missing value

df5 <- df4%>%
  filter(if_all(everything(), ~ !is.na(.)))

df5

5

df6 <- df5%>%
  select(編號, 性別, ECOG, BMI, Comorbidity, 腫瘤位置, 腫瘤大小, pT, pN, pM, Mortality, 死亡檔最長追蹤時間_月)%>%
  mutate(編號 = as.numeric(gsub(".*?([0-9]+).*", "\\1", 編號)))%>%
  mutate(性別 = as.numeric(gsub(".*?([0-9]+).*", "\\1", 性別)))%>%
  mutate(ECOG = as.numeric(gsub(".*?([0-9]+).*", "\\1", ECOG)))%>%
  mutate(BMI = as.numeric(gsub(".*?([0-9]+).*", "\\1", BMI)))%>%
  mutate(Comorbidity = as.numeric(gsub(".*?([0-9]+).*", "\\1", Comorbidity)))%>%
  mutate(腫瘤位置 = as.numeric(gsub(".*?([0-9]+).*", "\\1", 腫瘤位置)))%>%
  mutate(腫瘤大小 = as.numeric(gsub(".*?([0-9]+).*", "\\1", 腫瘤大小)))%>%
  mutate(pT = as.numeric(gsub(".*?([0-9]+).*", "\\1", pT)))%>%
  mutate(pN = as.numeric(gsub(".*?([0-9]+).*", "\\1", pN)))%>%
  mutate(pM = as.numeric(gsub(".*?([0-9]+).*", "\\1", pM)))%>%
  mutate(Mortality = as.numeric(gsub(".*?([0-9]+).*", "\\1", Mortality)))%>%
  mutate(死亡檔最長追蹤時間_月 = as.numeric(gsub(".*?([0-9]+).*", "\\1", 死亡檔最長追蹤時間_月)))

df6

6 Results

6.1 data frame

table

df5

summary()

summary(df5)
##       編號          性別                              ECOG    
##  Min.   :   4.0   1 男:519   0 無症狀                   :499  
##  1st Qu.: 954.8   2 女:579   1 有症狀,可步行對生活不影響:468  
##  Median :3221.5              2 躺在床上時間小於50%      :111  
##  Mean   :3262.5              3 躺在床上時間大於50%      : 15  
##  3rd Qu.:4929.0              4 完全臥床                 :  5  
##  Max.   :6961.0                                               
##            BMI           生日              Comorbidity           腫瘤位置  
##  0 Normal    :699   Min.   :1913-11-22   0 No HTN:406   0 non-visible:  1  
##  1 Overweight:399   1st Qu.:1936-03-05   1 HTN   :692   1 腎盂       :370  
##                     Median :1942-04-25                  2 輸尿管     :368  
##                     Mean   :1943-08-14                  3 膀胱袖口   :  5  
##                     3rd Qu.:1950-10-26                  4 多個位置   :353  
##                     Max.   :1989-07-25                  5 無法識別   :  1  
##             腫瘤大小             pT           pN           pM     
##  0 Not available: 13   0 None     :  1   0 None:  1   0 None:971  
##  1 <2cm         :251   1 T0/Tis/Ta:134   1 Nx  :794   1 M0  :122  
##  2 >=2cm        :834   2 T1       :200   2 N0  :213   2 M1  :  5  
##                        3 T2       :196   3 N1  : 31               
##                        4 T3       :461   4 N2  : 59               
##                        5 T4       :106                            
##               Mortality   Date_of_mortality    Age_of_Mortality
##  0 No              :  7   Min.   :2007-06-14   Min.   : 31.00  
##  1 UTUC related    :484   1st Qu.:2016-01-20   1st Qu.: 68.00  
##  2 Non-UTUC related:496   Median :2018-05-09   Median : 75.00  
##  3 死因不明        : 85   Mean   :2017-11-10   Mean   : 74.31  
##  4 Surgery related : 26   3rd Qu.:2020-03-25   3rd Qu.: 82.00  
##                           Max.   :2024-06-12   Max.   :103.00  
##  術前Cr_level_mg_dl Post_OP_1_month_eGFR 死亡檔最長追蹤時間_月
##  Min.   : 0.470     Min.   : 2.049       Min.   :  0.0657     
##  1st Qu.: 1.080     1st Qu.:43.973       1st Qu.: 10.6932     
##  Median : 1.485     Median :   Inf       Median : 26.7411     
##  Mean   : 2.625     Mean   :   Inf       Mean   : 35.5088     
##  3rd Qu.: 2.895     3rd Qu.:   Inf       3rd Qu.: 49.3183     
##  Max.   :21.860     Max.   :   Inf       Max.   :170.5322

6.2 numerical data frame

table

df6

summary()

summary(df6)
##       編號             性別            ECOG             BMI        
##  Min.   :   4.0   Min.   :1.000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.: 954.8   1st Qu.:1.000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :3221.5   Median :2.000   Median :1.0000   Median :0.0000  
##  Mean   :3262.5   Mean   :1.527   Mean   :0.6876   Mean   :0.3634  
##  3rd Qu.:4929.0   3rd Qu.:2.000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :6961.0   Max.   :2.000   Max.   :4.0000   Max.   :1.0000  
##   Comorbidity        腫瘤位置        腫瘤大小           pT       
##  Min.   :0.0000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.0000   1st Qu.:1.000   1st Qu.:2.000   1st Qu.:2.000  
##  Median :1.0000   Median :2.000   Median :2.000   Median :4.000  
##  Mean   :0.6302   Mean   :2.311   Mean   :1.748   Mean   :3.184  
##  3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:2.000   3rd Qu.:4.000  
##  Max.   :1.0000   Max.   :5.000   Max.   :2.000   Max.   :5.000  
##        pN              pM           Mortality     死亡檔最長追蹤時間_月
##  Min.   :0.000   Min.   :0.0000   Min.   :0.000   Min.   :  0.00       
##  1st Qu.:1.000   1st Qu.:0.0000   1st Qu.:1.000   1st Qu.: 10.00       
##  Median :1.000   Median :0.0000   Median :2.000   Median : 26.00       
##  Mean   :1.411   Mean   :0.1202   Mean   :1.671   Mean   : 35.01       
##  3rd Qu.:2.000   3rd Qu.:0.0000   3rd Qu.:2.000   3rd Qu.: 49.00       
##  Max.   :4.000   Max.   :2.0000   Max.   :4.000   Max.   :170.00